// UMatch.cp
// UMatch.h
// ----------------------------------------------------------------------------------
// Matches regular expressions.
//
//
// Note: This file is proprietary and confidential to Art Pollard
//	and Lextek Internation.  
// Copyright 1994 Art Pollard / LexTek International
//
//  Match the pattern PATTERN against the string TEXT;
//  return TRUE if it matches, FALSE otherwise.
//
//  A match means the entire string TEXT is used up in matching.
//
//  In the pattern string:
//       `*' matches any sequence of characters
//       `?' matches any character
//       [SET] matches any character in the specified set,
//       [!SET] or [^SET] matches any character not in the specified set.
//
//  Note: the standard regex character '+' (one or more) should by
//        simulated by using "?*" which is equivelant here.
//
//  A set is composed of characters or ranges; a range looks like
//  character hyphen character (as in 0-9 or A-Z).  [0-9a-zA-Z_] is the
//  minimul set of characters allowed in the [..] pattern construct.
//  Other characters are allowed (ie. 8 bit characters) if your system
//  will support them.
//
//  To suppress the special syntactic significance of any of `[]*?!^-\',
//  and match the character exactly, precede it with a `\'.
//
// ----------------------------------------------------------------------------------
// History:
// 		J. Kercheval		01/05/91
//			Original in public domain
//		Clark Goble			08/14/94
//			Checked it over and made a few modifications as part of the
//			C++ conversion.  Switch statements converted to if's for speed.
//			(PowerPC)
// ----------------------------------------------------------------------------------



#include "UMatch.h"

#define ABORT 2     /* end of search indicator */

// ----------------------------------------------------------------------------------
//	is_pattern	- Determine if the pattern has any wildcard characters
// ----------------------------------------------------------------------------------
// Returns true if there are some wildcard characters

BOOLEAN 
UMatch::is_pattern (char *p)
{
	do {
		// check for wildcard characters
		if ( ( *p == '?' ) || ( *p == '*' ) || ( *p == '[' ) )
			return TRUE;
		// check for escaped characters
		if ( *p == '\\' )
			// jump over next character
			if ( !*p++ )
				return FALSE;
	} while ( *p++ );
    return FALSE;
}

// ----------------------------------------------------------------------------------
//	regex_match	- match the patter against a string
// ----------------------------------------------------------------------------------
// Returns true if is a match, false otherwise.

BOOLEAN 
UMatch::regex_match ( register char *p, register char *t )
{
    register char range_start, range_end;  /* start and end in range */

    BOOLEAN invert;             /* is this [..] or [!..] */
    BOOLEAN member_match;       /* have I matched the [..] construct? */
    BOOLEAN loop;               /* should I terminate? */

    for ( ; *p; p++, t++ ) 
    {
	
		/* if this is the end of the text then this is the end of the match */
		if (!*t) {
		    return ( *p == '*' && *++p == '\0' ) ? TRUE : ABORT;
		} // if EOF
	
		/* determine and react to pattern type */
		if ( *p == '?')
			break;
		
		    /* multiple any character match */
		if ( *p ==  '*')
			return regex_match_after_star (p, t);
	
		    /* [..] construct, single member/exclusion character match */
		if ( *p == '[')
		{
	
			/* move to beginning of range */
			p++;
	
			/* check if this is a member match or exclusion match */
			invert = FALSE;
			if ( *p == '!' || *p == '^') {
			    invert = TRUE;
			    p++;
			} // if
	
			/* if closing bracket here or at range start then we have a
			   malformed pattern */
			if ( *p == ']' ) {
			    return ABORT;
			} // if
	
			member_match = FALSE;
			loop = TRUE;
	
			while ( loop ) {
	
			    /* if end of construct then loop is done */
			    if (*p == ']') 
			    {
					loop = FALSE;
					continue;
			    } // if 
	
			    /* matching a '!', '^', '-', '\' or a ']' */
			    if ( *p == '\\' ) 
			    {
					range_start = range_end = *++p;
			    } // if
			    else 
			    {
					range_start = range_end = *p;
			    } // else 
	
			    /* if end of pattern then bad pattern (Missing ']') */
			    if (!range_start)
					return ABORT;
	
			    /* check for range bar */
			    if (*++p == '-') 
			    {
		
					/* get the range end */
					range_end = *++p;
	
					/* if end of pattern or construct then bad pattern */
					if (range_end == '\0' || range_end == ']')
					    return ABORT;
	
					/* special character range end */
					if (range_end == '\\')
					    range_end = *++p;
		
					/* move just beyond this range */
					p++;
			    } // if
	
			    /* if the text character is in range then match found.
			       make sure the range letters have the proper
			       relationship to one another before comparison */
			    if ( range_start < range_end  ) 
			    {
					if (*t >= range_start && *t <= range_end) 
					{
					    member_match = TRUE;
					    loop = FALSE;
					} // if
			    } // if 
			    else {
					if (*t >= range_end && *t <= range_start) 
					{
				    	member_match = TRUE;
				    	loop = FALSE;
	                } // if
	            } // else
	             
	
	            /* if there was a match in an exclusion set then no match */
	            /* if there was no match in a member set then no match */
	            if ((invert && member_match) || !(invert || member_match))
	            	return FALSE;
	
	            /* if this is not an exclusion then skip the rest of the [...]
	               construct that already matched. */
	            if (member_match) 
	            {
					while (*p != ']') 
					{
						/* bad pattern (Missing ']') */
	                    if (!*p)
	                    	return ABORT;
	
	                    /* skip exact match */
						if (*p == '\\') 
						{
	                    	p++;
	                    } // if
	
	                    /* move to next pattern char */
	                        p++;
	                } // while
	           } // if
	
		       break;
	        } // while
		} // if [
		
	            /* next character is quoted and must match exactly */
		if (*p == '\\')
		{
	        /* move pattern pointer to quoted char and fall through */
	        p++;
	    } // if \
		
	   	/* must match this character exactly */               
	   	// default:
	    if (*p != *t)
			return FALSE;
	} // for

    /* if end of text not reached then the pattern fails */
    return !*t;
}


// ----------------------------------------------------------------------------------
//	regex_match_after_star	- regular match
// ----------------------------------------------------------------------------------
// recursively call regex_match with final segment of PATTERN and of TEXT.

BOOLEAN 
UMatch::regex_match_after_star (register char *p, register char *t)
{
    register BOOLEAN match;
    register int nextp;

    /* pass over existing ? and * in pattern */
    while ( *p == '?' || *p == '*' ) {

        /* take one char for each ? */
        if ( *p == '?' ) {

		    /* if end of text then no match */
		    if ( !*t++ ) {
			return ABORT;
		    }
		}

		/* move to next char in pattern */
		p++;
    }

    /* if end of pattern we have matched regardless of text left */
    if ( !*p ) {
		return TRUE;
    }

    /* get the next character to match which must be a literal or '[' */
    nextp = *p;
    if ( nextp == '\\' )
		nextp = p[1];

    /* Continue until we run out of text or definite result seen */
    match = FALSE;
    while ( match == FALSE ) {

		/* a precondition for matching is that the next character
		   in the pattern match the next character in the text or that
		   the next pattern char is the beginning of a range.  Increment
		   text pointer as we go here */
		if ( nextp == *t || nextp == '[' ) 
		{
		    match = regex_match(p, t);
		}
	
		/* if the end of text is reached then no match */
		if ( !*t++ ) match = ABORT;
    }

    /* return result */
    return match;
}

// ----------------------------------------------------------------------------------
//	match	- regular match
// ----------------------------------------------------------------------------------
// This is the main routine.  It returns a boolean if a match is found

BOOLEAN 
UMatch::match( char *p, char *t)
{
    return ( regex_match(p,t) == TRUE ) ? TRUE : FALSE;
}

// ----------------------------------------------------------------------------------

#ifdef TEST

/*
* This test main expects as first arg the pattern and as second arg
* the match string.  Output is yaeh or nay on match.
*/

#include <stdio.h>

short main(short argc, char *argv[])
{
	if (argc == 3) {
	    if (is_pattern(argv[1])) {
			if (UMatch::match(argv[1],argv[2])) {
			    printf("    Match Successful\n");
			} else 
			{
		    printf("    Match Fails\n");
			}
		} else 
		{
			printf("    Bad Pattern\n");
		}
	} else 
		printf("    Bad Breath\n");
	return(0);
}

#endif



